In [8]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report 
import os
In [9]:
print (os.getcwd())
C:\Users\mannr
In [1]:
os.chdir('C:\\Users\\mannr\\OneDrive\\Documents\\acme data science')
print (os.getcwd())
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[1], line 1
----> 1 os.chdir('C:\\Users\\mannr\\OneDrive\\Documents\\acme data science')
      2 print (os.getcwd())

NameError: name 'os' is not defined
In [11]:
df=pd.read_csv('parkinsons.data')
display (df)
name MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer ... Shimmer:DDA NHR HNR status RPDE DFA spread1 spread2 D2 PPE
0 phon_R01_S01_1 119.992 157.302 74.997 0.00784 0.00007 0.00370 0.00554 0.01109 0.04374 ... 0.06545 0.02211 21.033 1 0.414783 0.815285 -4.813031 0.266482 2.301442 0.284654
1 phon_R01_S01_2 122.400 148.650 113.819 0.00968 0.00008 0.00465 0.00696 0.01394 0.06134 ... 0.09403 0.01929 19.085 1 0.458359 0.819521 -4.075192 0.335590 2.486855 0.368674
2 phon_R01_S01_3 116.682 131.111 111.555 0.01050 0.00009 0.00544 0.00781 0.01633 0.05233 ... 0.08270 0.01309 20.651 1 0.429895 0.825288 -4.443179 0.311173 2.342259 0.332634
3 phon_R01_S01_4 116.676 137.871 111.366 0.00997 0.00009 0.00502 0.00698 0.01505 0.05492 ... 0.08771 0.01353 20.644 1 0.434969 0.819235 -4.117501 0.334147 2.405554 0.368975
4 phon_R01_S01_5 116.014 141.781 110.655 0.01284 0.00011 0.00655 0.00908 0.01966 0.06425 ... 0.10470 0.01767 19.649 1 0.417356 0.823484 -3.747787 0.234513 2.332180 0.410335
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
190 phon_R01_S50_2 174.188 230.978 94.261 0.00459 0.00003 0.00263 0.00259 0.00790 0.04087 ... 0.07008 0.02764 19.517 0 0.448439 0.657899 -6.538586 0.121952 2.657476 0.133050
191 phon_R01_S50_3 209.516 253.017 89.488 0.00564 0.00003 0.00331 0.00292 0.00994 0.02751 ... 0.04812 0.01810 19.147 0 0.431674 0.683244 -6.195325 0.129303 2.784312 0.168895
192 phon_R01_S50_4 174.688 240.005 74.287 0.01360 0.00008 0.00624 0.00564 0.01873 0.02308 ... 0.03804 0.10715 17.883 0 0.407567 0.655683 -6.787197 0.158453 2.679772 0.131728
193 phon_R01_S50_5 198.764 396.961 74.904 0.00740 0.00004 0.00370 0.00390 0.01109 0.02296 ... 0.03794 0.07223 19.020 0 0.451221 0.643956 -6.744577 0.207454 2.138608 0.123306
194 phon_R01_S50_6 214.289 260.277 77.973 0.00567 0.00003 0.00295 0.00317 0.00885 0.01884 ... 0.03078 0.04398 21.209 0 0.462803 0.664357 -5.724056 0.190667 2.555477 0.148569

195 rows × 24 columns

In [12]:
import pandas_profiling as pf
display(pf.ProfileReport(df))
C:\Users\mannr\AppData\Local\Temp\ipykernel_22856\2072535499.py:1: DeprecationWarning: `import pandas_profiling` is going to be deprecated by April 1st. Please use `import ydata_profiling` instead.
  import pandas_profiling as pf
Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]
Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]
Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

In [14]:
 print(df.info())
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 24 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              195 non-null    object 
 1   MDVP:Fo(Hz)       195 non-null    float64
 2   MDVP:Fhi(Hz)      195 non-null    float64
 3   MDVP:Flo(Hz)      195 non-null    float64
 4   MDVP:Jitter(%)    195 non-null    float64
 5   MDVP:Jitter(Abs)  195 non-null    float64
 6   MDVP:RAP          195 non-null    float64
 7   MDVP:PPQ          195 non-null    float64
 8   Jitter:DDP        195 non-null    float64
 9   MDVP:Shimmer      195 non-null    float64
 10  MDVP:Shimmer(dB)  195 non-null    float64
 11  Shimmer:APQ3      195 non-null    float64
 12  Shimmer:APQ5      195 non-null    float64
 13  MDVP:APQ          195 non-null    float64
 14  Shimmer:DDA       195 non-null    float64
 15  NHR               195 non-null    float64
 16  HNR               195 non-null    float64
 17  status            195 non-null    int64  
 18  RPDE              195 non-null    float64
 19  DFA               195 non-null    float64
 20  spread1           195 non-null    float64
 21  spread2           195 non-null    float64
 22  D2                195 non-null    float64
 23  PPE               195 non-null    float64
dtypes: float64(22), int64(1), object(1)
memory usage: 36.7+ KB
None
In [15]:
display(df.describe())
MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer MDVP:Shimmer(dB) ... Shimmer:DDA NHR HNR status RPDE DFA spread1 spread2 D2 PPE
count 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 ... 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000 195.000000
mean 154.228641 197.104918 116.324631 0.006220 0.000044 0.003306 0.003446 0.009920 0.029709 0.282251 ... 0.046993 0.024847 21.885974 0.753846 0.498536 0.718099 -5.684397 0.226510 2.381826 0.206552
std 41.390065 91.491548 43.521413 0.004848 0.000035 0.002968 0.002759 0.008903 0.018857 0.194877 ... 0.030459 0.040418 4.425764 0.431878 0.103942 0.055336 1.090208 0.083406 0.382799 0.090119
min 88.333000 102.145000 65.476000 0.001680 0.000007 0.000680 0.000920 0.002040 0.009540 0.085000 ... 0.013640 0.000650 8.441000 0.000000 0.256570 0.574282 -7.964984 0.006274 1.423287 0.044539
25% 117.572000 134.862500 84.291000 0.003460 0.000020 0.001660 0.001860 0.004985 0.016505 0.148500 ... 0.024735 0.005925 19.198000 1.000000 0.421306 0.674758 -6.450096 0.174351 2.099125 0.137451
50% 148.790000 175.829000 104.315000 0.004940 0.000030 0.002500 0.002690 0.007490 0.022970 0.221000 ... 0.038360 0.011660 22.085000 1.000000 0.495954 0.722254 -5.720868 0.218885 2.361532 0.194052
75% 182.769000 224.205500 140.018500 0.007365 0.000060 0.003835 0.003955 0.011505 0.037885 0.350000 ... 0.060795 0.025640 25.075500 1.000000 0.587562 0.761881 -5.046192 0.279234 2.636456 0.252980
max 260.105000 592.030000 239.170000 0.033160 0.000260 0.021440 0.019580 0.064330 0.119080 1.302000 ... 0.169420 0.314820 33.047000 1.000000 0.685151 0.825288 -2.434031 0.450493 3.671155 0.527367

8 rows × 23 columns

In [16]:
display(df.isna())
name MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer ... Shimmer:DDA NHR HNR status RPDE DFA spread1 spread2 D2 PPE
0 False False False False False False False False False False ... False False False False False False False False False False
1 False False False False False False False False False False ... False False False False False False False False False False
2 False False False False False False False False False False ... False False False False False False False False False False
3 False False False False False False False False False False ... False False False False False False False False False False
4 False False False False False False False False False False ... False False False False False False False False False False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
190 False False False False False False False False False False ... False False False False False False False False False False
191 False False False False False False False False False False ... False False False False False False False False False False
192 False False False False False False False False False False ... False False False False False False False False False False
193 False False False False False False False False False False ... False False False False False False False False False False
194 False False False False False False False False False False ... False False False False False False False False False False

195 rows × 24 columns

In [17]:
display(df.isna().sum())
name                0
MDVP:Fo(Hz)         0
MDVP:Fhi(Hz)        0
MDVP:Flo(Hz)        0
MDVP:Jitter(%)      0
MDVP:Jitter(Abs)    0
MDVP:RAP            0
MDVP:PPQ            0
Jitter:DDP          0
MDVP:Shimmer        0
MDVP:Shimmer(dB)    0
Shimmer:APQ3        0
Shimmer:APQ5        0
MDVP:APQ            0
Shimmer:DDA         0
NHR                 0
HNR                 0
status              0
RPDE                0
DFA                 0
spread1             0
spread2             0
D2                  0
PPE                 0
dtype: int64
In [18]:
import matplotlib
print(matplotlib.get_backend())  # To check the current backend
matplotlib.use('module://matplotlib_inline.backend_inline')  # To set the backend explicitly
module://matplotlib_inline.backend_inline
In [19]:
plt.figure(figsize=(10, 6))
df.status.hist()
plt.xlabel('Status')
plt.ylabel('Frequencies')
plt.plot()
plt.show()
In [20]:
corr = df.corr()
display(corr)
C:\Users\mannr\AppData\Local\Temp\ipykernel_22856\543053642.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  corr = df.corr()
MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer MDVP:Shimmer(dB) ... Shimmer:DDA NHR HNR status RPDE DFA spread1 spread2 D2 PPE
MDVP:Fo(Hz) 1.000000 0.400985 0.596546 -0.118003 -0.382027 -0.076194 -0.112165 -0.076213 -0.098374 -0.073742 ... -0.094732 -0.021981 0.059144 -0.383535 -0.383894 -0.446013 -0.413738 -0.249450 0.177980 -0.372356
MDVP:Fhi(Hz) 0.400985 1.000000 0.084951 0.102086 -0.029198 0.097177 0.091126 0.097150 0.002281 0.043465 ... -0.003733 0.163766 -0.024893 -0.166136 -0.112404 -0.343097 -0.076658 -0.002954 0.176323 -0.069543
MDVP:Flo(Hz) 0.596546 0.084951 1.000000 -0.139919 -0.277815 -0.100519 -0.095828 -0.100488 -0.144543 -0.119089 ... -0.150737 -0.108670 0.210851 -0.380200 -0.400143 -0.050406 -0.394857 -0.243829 -0.100629 -0.340071
MDVP:Jitter(%) -0.118003 0.102086 -0.139919 1.000000 0.935714 0.990276 0.974256 0.990276 0.769063 0.804289 ... 0.746635 0.906959 -0.728165 0.278220 0.360673 0.098572 0.693577 0.385123 0.433434 0.721543
MDVP:Jitter(Abs) -0.382027 -0.029198 -0.277815 0.935714 1.000000 0.922911 0.897778 0.922913 0.703322 0.716601 ... 0.697170 0.834972 -0.656810 0.338653 0.441839 0.175036 0.735779 0.388543 0.310694 0.748162
MDVP:RAP -0.076194 0.097177 -0.100519 0.990276 0.922911 1.000000 0.957317 1.000000 0.759581 0.790652 ... 0.744919 0.919521 -0.721543 0.266668 0.342140 0.064083 0.648328 0.324407 0.426605 0.670999
MDVP:PPQ -0.112165 0.091126 -0.095828 0.974256 0.897778 0.957317 1.000000 0.957319 0.797826 0.839239 ... 0.763592 0.844604 -0.731510 0.288698 0.333274 0.196301 0.716489 0.407605 0.412524 0.769647
Jitter:DDP -0.076213 0.097150 -0.100488 0.990276 0.922913 1.000000 0.957319 1.000000 0.759555 0.790621 ... 0.744901 0.919548 -0.721494 0.266646 0.342079 0.064026 0.648328 0.324377 0.426556 0.671005
MDVP:Shimmer -0.098374 0.002281 -0.144543 0.769063 0.703322 0.759581 0.797826 0.759555 1.000000 0.987258 ... 0.987626 0.722194 -0.835271 0.367430 0.447424 0.159954 0.654734 0.452025 0.507088 0.693771
MDVP:Shimmer(dB) -0.073742 0.043465 -0.119089 0.804289 0.716601 0.790652 0.839239 0.790621 0.987258 1.000000 ... 0.963202 0.744477 -0.827805 0.350697 0.410684 0.165157 0.652547 0.454314 0.512233 0.695058
Shimmer:APQ3 -0.094717 -0.003743 -0.150747 0.746625 0.697153 0.744912 0.763580 0.744894 0.987625 0.963198 ... 1.000000 0.716207 -0.827123 0.347617 0.435242 0.151124 0.610967 0.402243 0.467265 0.645377
Shimmer:APQ5 -0.070682 -0.009997 -0.101095 0.725561 0.648961 0.709927 0.786780 0.709907 0.982835 0.973751 ... 0.960072 0.658080 -0.813753 0.351148 0.399903 0.213873 0.646809 0.457195 0.502174 0.702456
MDVP:APQ -0.077774 0.004937 -0.107293 0.758255 0.648793 0.737455 0.804139 0.737439 0.950083 0.960977 ... 0.896647 0.694019 -0.800407 0.364316 0.451379 0.157276 0.673158 0.502188 0.536869 0.721694
Shimmer:DDA -0.094732 -0.003733 -0.150737 0.746635 0.697170 0.744919 0.763592 0.744901 0.987626 0.963202 ... 1.000000 0.716215 -0.827130 0.347608 0.435237 0.151132 0.610971 0.402223 0.467261 0.645389
NHR -0.021981 0.163766 -0.108670 0.906959 0.834972 0.919521 0.844604 0.919548 0.722194 0.744477 ... 0.716215 1.000000 -0.714072 0.189429 0.370890 -0.131882 0.540865 0.318099 0.470949 0.552591
HNR 0.059144 -0.024893 0.210851 -0.728165 -0.656810 -0.721543 -0.731510 -0.721494 -0.835271 -0.827805 ... -0.827130 -0.714072 1.000000 -0.361515 -0.598736 -0.008665 -0.673210 -0.431564 -0.601401 -0.692876
status -0.383535 -0.166136 -0.380200 0.278220 0.338653 0.266668 0.288698 0.266646 0.367430 0.350697 ... 0.347608 0.189429 -0.361515 1.000000 0.308567 0.231739 0.564838 0.454842 0.340232 0.531039
RPDE -0.383894 -0.112404 -0.400143 0.360673 0.441839 0.342140 0.333274 0.342079 0.447424 0.410684 ... 0.435237 0.370890 -0.598736 0.308567 1.000000 -0.110950 0.591117 0.479905 0.236931 0.545886
DFA -0.446013 -0.343097 -0.050406 0.098572 0.175036 0.064083 0.196301 0.064026 0.159954 0.165157 ... 0.151132 -0.131882 -0.008665 0.231739 -0.110950 1.000000 0.195668 0.166548 -0.165381 0.270445
spread1 -0.413738 -0.076658 -0.394857 0.693577 0.735779 0.648328 0.716489 0.648328 0.654734 0.652547 ... 0.610971 0.540865 -0.673210 0.564838 0.591117 0.195668 1.000000 0.652358 0.495123 0.962435
spread2 -0.249450 -0.002954 -0.243829 0.385123 0.388543 0.324407 0.407605 0.324377 0.452025 0.454314 ... 0.402223 0.318099 -0.431564 0.454842 0.479905 0.166548 0.652358 1.000000 0.523532 0.644711
D2 0.177980 0.176323 -0.100629 0.433434 0.310694 0.426605 0.412524 0.426556 0.507088 0.512233 ... 0.467261 0.470949 -0.601401 0.340232 0.236931 -0.165381 0.495123 0.523532 1.000000 0.480585
PPE -0.372356 -0.069543 -0.340071 0.721543 0.748162 0.670999 0.769647 0.671005 0.693771 0.695058 ... 0.645389 0.552591 -0.692876 0.531039 0.545886 0.270445 0.962435 0.644711 0.480585 1.000000

23 rows × 23 columns

In [21]:
plt.figure(figsize=(10, 6))
sns.barplot(x="status",y="HNR",data=df);
plt.show()
In [22]:
display(df)
name MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer ... Shimmer:DDA NHR HNR status RPDE DFA spread1 spread2 D2 PPE
0 phon_R01_S01_1 119.992 157.302 74.997 0.00784 0.00007 0.00370 0.00554 0.01109 0.04374 ... 0.06545 0.02211 21.033 1 0.414783 0.815285 -4.813031 0.266482 2.301442 0.284654
1 phon_R01_S01_2 122.400 148.650 113.819 0.00968 0.00008 0.00465 0.00696 0.01394 0.06134 ... 0.09403 0.01929 19.085 1 0.458359 0.819521 -4.075192 0.335590 2.486855 0.368674
2 phon_R01_S01_3 116.682 131.111 111.555 0.01050 0.00009 0.00544 0.00781 0.01633 0.05233 ... 0.08270 0.01309 20.651 1 0.429895 0.825288 -4.443179 0.311173 2.342259 0.332634
3 phon_R01_S01_4 116.676 137.871 111.366 0.00997 0.00009 0.00502 0.00698 0.01505 0.05492 ... 0.08771 0.01353 20.644 1 0.434969 0.819235 -4.117501 0.334147 2.405554 0.368975
4 phon_R01_S01_5 116.014 141.781 110.655 0.01284 0.00011 0.00655 0.00908 0.01966 0.06425 ... 0.10470 0.01767 19.649 1 0.417356 0.823484 -3.747787 0.234513 2.332180 0.410335
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
190 phon_R01_S50_2 174.188 230.978 94.261 0.00459 0.00003 0.00263 0.00259 0.00790 0.04087 ... 0.07008 0.02764 19.517 0 0.448439 0.657899 -6.538586 0.121952 2.657476 0.133050
191 phon_R01_S50_3 209.516 253.017 89.488 0.00564 0.00003 0.00331 0.00292 0.00994 0.02751 ... 0.04812 0.01810 19.147 0 0.431674 0.683244 -6.195325 0.129303 2.784312 0.168895
192 phon_R01_S50_4 174.688 240.005 74.287 0.01360 0.00008 0.00624 0.00564 0.01873 0.02308 ... 0.03804 0.10715 17.883 0 0.407567 0.655683 -6.787197 0.158453 2.679772 0.131728
193 phon_R01_S50_5 198.764 396.961 74.904 0.00740 0.00004 0.00370 0.00390 0.01109 0.02296 ... 0.03794 0.07223 19.020 0 0.451221 0.643956 -6.744577 0.207454 2.138608 0.123306
194 phon_R01_S50_6 214.289 260.277 77.973 0.00567 0.00003 0.00295 0.00317 0.00885 0.01884 ... 0.03078 0.04398 21.209 0 0.462803 0.664357 -5.724056 0.190667 2.555477 0.148569

195 rows × 24 columns

In [23]:
df.drop(['name'],axis=1,inplace=True)
display (df)
MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer MDVP:Shimmer(dB) ... Shimmer:DDA NHR HNR status RPDE DFA spread1 spread2 D2 PPE
0 119.992 157.302 74.997 0.00784 0.00007 0.00370 0.00554 0.01109 0.04374 0.426 ... 0.06545 0.02211 21.033 1 0.414783 0.815285 -4.813031 0.266482 2.301442 0.284654
1 122.400 148.650 113.819 0.00968 0.00008 0.00465 0.00696 0.01394 0.06134 0.626 ... 0.09403 0.01929 19.085 1 0.458359 0.819521 -4.075192 0.335590 2.486855 0.368674
2 116.682 131.111 111.555 0.01050 0.00009 0.00544 0.00781 0.01633 0.05233 0.482 ... 0.08270 0.01309 20.651 1 0.429895 0.825288 -4.443179 0.311173 2.342259 0.332634
3 116.676 137.871 111.366 0.00997 0.00009 0.00502 0.00698 0.01505 0.05492 0.517 ... 0.08771 0.01353 20.644 1 0.434969 0.819235 -4.117501 0.334147 2.405554 0.368975
4 116.014 141.781 110.655 0.01284 0.00011 0.00655 0.00908 0.01966 0.06425 0.584 ... 0.10470 0.01767 19.649 1 0.417356 0.823484 -3.747787 0.234513 2.332180 0.410335
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
190 174.188 230.978 94.261 0.00459 0.00003 0.00263 0.00259 0.00790 0.04087 0.405 ... 0.07008 0.02764 19.517 0 0.448439 0.657899 -6.538586 0.121952 2.657476 0.133050
191 209.516 253.017 89.488 0.00564 0.00003 0.00331 0.00292 0.00994 0.02751 0.263 ... 0.04812 0.01810 19.147 0 0.431674 0.683244 -6.195325 0.129303 2.784312 0.168895
192 174.688 240.005 74.287 0.01360 0.00008 0.00624 0.00564 0.01873 0.02308 0.256 ... 0.03804 0.10715 17.883 0 0.407567 0.655683 -6.787197 0.158453 2.679772 0.131728
193 198.764 396.961 74.904 0.00740 0.00004 0.00370 0.00390 0.01109 0.02296 0.241 ... 0.03794 0.07223 19.020 0 0.451221 0.643956 -6.744577 0.207454 2.138608 0.123306
194 214.289 260.277 77.973 0.00567 0.00003 0.00295 0.00317 0.00885 0.01884 0.190 ... 0.03078 0.04398 21.209 0 0.462803 0.664357 -5.724056 0.190667 2.555477 0.148569

195 rows × 23 columns

In [24]:
x = df.drop(labels=['status'], axis =1)
print(x)
display (x.head())
     MDVP:Fo(Hz)  MDVP:Fhi(Hz)  MDVP:Flo(Hz)  MDVP:Jitter(%)  \
0        119.992       157.302        74.997         0.00784   
1        122.400       148.650       113.819         0.00968   
2        116.682       131.111       111.555         0.01050   
3        116.676       137.871       111.366         0.00997   
4        116.014       141.781       110.655         0.01284   
..           ...           ...           ...             ...   
190      174.188       230.978        94.261         0.00459   
191      209.516       253.017        89.488         0.00564   
192      174.688       240.005        74.287         0.01360   
193      198.764       396.961        74.904         0.00740   
194      214.289       260.277        77.973         0.00567   

     MDVP:Jitter(Abs)  MDVP:RAP  MDVP:PPQ  Jitter:DDP  MDVP:Shimmer  \
0             0.00007   0.00370   0.00554     0.01109       0.04374   
1             0.00008   0.00465   0.00696     0.01394       0.06134   
2             0.00009   0.00544   0.00781     0.01633       0.05233   
3             0.00009   0.00502   0.00698     0.01505       0.05492   
4             0.00011   0.00655   0.00908     0.01966       0.06425   
..                ...       ...       ...         ...           ...   
190           0.00003   0.00263   0.00259     0.00790       0.04087   
191           0.00003   0.00331   0.00292     0.00994       0.02751   
192           0.00008   0.00624   0.00564     0.01873       0.02308   
193           0.00004   0.00370   0.00390     0.01109       0.02296   
194           0.00003   0.00295   0.00317     0.00885       0.01884   

     MDVP:Shimmer(dB)  ...  MDVP:APQ  Shimmer:DDA      NHR     HNR      RPDE  \
0               0.426  ...   0.02971      0.06545  0.02211  21.033  0.414783   
1               0.626  ...   0.04368      0.09403  0.01929  19.085  0.458359   
2               0.482  ...   0.03590      0.08270  0.01309  20.651  0.429895   
3               0.517  ...   0.03772      0.08771  0.01353  20.644  0.434969   
4               0.584  ...   0.04465      0.10470  0.01767  19.649  0.417356   
..                ...  ...       ...          ...      ...     ...       ...   
190             0.405  ...   0.02745      0.07008  0.02764  19.517  0.448439   
191             0.263  ...   0.01879      0.04812  0.01810  19.147  0.431674   
192             0.256  ...   0.01667      0.03804  0.10715  17.883  0.407567   
193             0.241  ...   0.01588      0.03794  0.07223  19.020  0.451221   
194             0.190  ...   0.01373      0.03078  0.04398  21.209  0.462803   

          DFA   spread1   spread2        D2       PPE  
0    0.815285 -4.813031  0.266482  2.301442  0.284654  
1    0.819521 -4.075192  0.335590  2.486855  0.368674  
2    0.825288 -4.443179  0.311173  2.342259  0.332634  
3    0.819235 -4.117501  0.334147  2.405554  0.368975  
4    0.823484 -3.747787  0.234513  2.332180  0.410335  
..        ...       ...       ...       ...       ...  
190  0.657899 -6.538586  0.121952  2.657476  0.133050  
191  0.683244 -6.195325  0.129303  2.784312  0.168895  
192  0.655683 -6.787197  0.158453  2.679772  0.131728  
193  0.643956 -6.744577  0.207454  2.138608  0.123306  
194  0.664357 -5.724056  0.190667  2.555477  0.148569  

[195 rows x 22 columns]
MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer MDVP:Shimmer(dB) ... MDVP:APQ Shimmer:DDA NHR HNR RPDE DFA spread1 spread2 D2 PPE
0 119.992 157.302 74.997 0.00784 0.00007 0.00370 0.00554 0.01109 0.04374 0.426 ... 0.02971 0.06545 0.02211 21.033 0.414783 0.815285 -4.813031 0.266482 2.301442 0.284654
1 122.400 148.650 113.819 0.00968 0.00008 0.00465 0.00696 0.01394 0.06134 0.626 ... 0.04368 0.09403 0.01929 19.085 0.458359 0.819521 -4.075192 0.335590 2.486855 0.368674
2 116.682 131.111 111.555 0.01050 0.00009 0.00544 0.00781 0.01633 0.05233 0.482 ... 0.03590 0.08270 0.01309 20.651 0.429895 0.825288 -4.443179 0.311173 2.342259 0.332634
3 116.676 137.871 111.366 0.00997 0.00009 0.00502 0.00698 0.01505 0.05492 0.517 ... 0.03772 0.08771 0.01353 20.644 0.434969 0.819235 -4.117501 0.334147 2.405554 0.368975
4 116.014 141.781 110.655 0.01284 0.00011 0.00655 0.00908 0.01966 0.06425 0.584 ... 0.04465 0.10470 0.01767 19.649 0.417356 0.823484 -3.747787 0.234513 2.332180 0.410335

5 rows × 22 columns

In [25]:
y = df['status']
display(y.head())
0    1
1    1
2    1
3    1
4    1
Name: status, dtype: int64
In [26]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=40)
print (x.shape,y.shape)
print(x_train.shape,x_test.shape,y_train.shape,y_test.shape)
(195, 22) (195,)
(156, 22) (39, 22) (156,) (39,)
In [27]:
print(x_train)
     MDVP:Fo(Hz)  MDVP:Fhi(Hz)  MDVP:Flo(Hz)  MDVP:Jitter(%)  \
147      183.520       216.814       161.340         0.01466   
86       178.222       202.450       141.047         0.00321   
179      148.143       155.982       135.041         0.00392   
69       148.090       162.824        67.343         0.00762   
125      145.174       198.109        80.637         0.00733   
..           ...           ...           ...             ...   
50       124.445       135.069       117.495         0.00431   
184      116.848       217.552        99.503         0.00531   
165      236.200       244.663       102.137         0.00277   
7        107.332       113.840       104.315         0.00290   
70       142.729       162.408        65.476         0.00831   

     MDVP:Jitter(Abs)  MDVP:RAP  MDVP:PPQ  Jitter:DDP  MDVP:Shimmer  \
147           0.00008   0.00849   0.00819     0.02546       0.06050   
86            0.00002   0.00163   0.00194     0.00488       0.03759   
179           0.00003   0.00204   0.00231     0.00612       0.01450   
69            0.00005   0.00467   0.00354     0.01400       0.05428   
125           0.00005   0.00414   0.00422     0.01242       0.02362   
..                ...       ...       ...         ...           ...   
50            0.00003   0.00141   0.00167     0.00422       0.02184   
184           0.00005   0.00260   0.00346     0.00780       0.01795   
165           0.00001   0.00154   0.00153     0.00462       0.02448   
7             0.00003   0.00144   0.00182     0.00431       0.01567   
70            0.00006   0.00469   0.00419     0.01407       0.03485   

     MDVP:Shimmer(dB)  ...  MDVP:APQ  Shimmer:DDA      NHR     HNR      RPDE  \
147             0.618  ...   0.06359      0.08595  0.06057  14.367  0.478024   
86              0.327  ...   0.02784      0.06219  0.03151  15.924  0.598714   
179             0.131  ...   0.01263      0.02175  0.00540  23.683  0.398499   
69              0.497  ...   0.03635      0.10070  0.02431  21.718  0.487407   
125             0.233  ...   0.01944      0.03706  0.01874  18.857  0.637518   
..                ...  ...       ...          ...      ...     ...       ...   
50              0.197  ...   0.01685      0.03724  0.00479  25.135  0.553134   
184             0.163  ...   0.01756      0.02429  0.01179  22.085  0.663842   
165             0.217  ...   0.01621      0.04231  0.00620  24.078  0.469928   
7               0.134  ...   0.01256      0.02487  0.00344  26.892  0.637420   
70              0.365  ...   0.02949      0.05605  0.02599  20.264  0.489345   

          DFA   spread1   spread2        D2       PPE  
147  0.768974 -4.276605  0.355736  3.142364  0.336085  
86   0.712199 -6.366916  0.335753  2.654271  0.144614  
179  0.778349 -5.711205  0.240875  2.845109  0.192730  
69   0.727313 -6.261141  0.120956  2.137075  0.141958  
125  0.735546 -5.594275  0.127950  1.765957  0.222716  
..        ...       ...       ...       ...       ...  
50   0.775933 -6.650471  0.254498  1.840198  0.103561  
184  0.656516 -5.198864  0.206768  2.120412  0.252404  
165  0.628232 -6.816086  0.172270  2.235197  0.119652  
7    0.763262 -6.167603  0.183721  2.064693  0.163755  
70   0.730387 -5.720868  0.158830  2.277927  0.180828  

[156 rows x 22 columns]
In [28]:
print(y_train)
147    1
86     1
179    1
69     1
125    1
      ..
50     0
184    0
165    0
7      1
70     1
Name: status, Length: 156, dtype: int64
In [29]:
log_reg = LogisticRegression().fit(x_train, y_train)

train_preds = log_reg.predict(x_train)

print("Model accuracy on train is: ", accuracy_score(y_train, train_preds))


test_preds = log_reg.predict(x_test)

print("Model accuracy on test is: ", accuracy_score(y_test, test_preds))
print('*'*50)

print("confusion_matrix train is:\n ", confusion_matrix(y_train, train_preds))
print("confusion_matrix test is:\n ", confusion_matrix(y_test, test_preds))
print('\nClassification Report Train is ')
print(classification_report (y_train, train_preds))
print('\nClassification Report Test is ')
print(classification_report (y_test, test_preds))
Model accuracy on train is:  0.8782051282051282
Model accuracy on test is:  0.8461538461538461
**************************************************
confusion_matrix train is:
  [[ 25  15]
 [  4 112]]
confusion_matrix test is:
  [[ 5  3]
 [ 3 28]]

Classification Report Train is 
              precision    recall  f1-score   support

           0       0.86      0.62      0.72        40
           1       0.88      0.97      0.92       116

    accuracy                           0.88       156
   macro avg       0.87      0.80      0.82       156
weighted avg       0.88      0.88      0.87       156


Classification Report Test is 
              precision    recall  f1-score   support

           0       0.62      0.62      0.62         8
           1       0.90      0.90      0.90        31

    accuracy                           0.85        39
   macro avg       0.76      0.76      0.76        39
weighted avg       0.85      0.85      0.85        39

C:\Users\mannr\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
In [30]:
full_pred = log_reg.predict(x)
print(accuracy_score(y,full_pred))
0.8717948717948718
In [31]:
RF=RandomForestClassifier().fit(x_train,y_train)
 
train_preds2 = RF.predict(x_train)

print("Model accuracy on train is: ", accuracy_score(y_train, train_preds2))


test_preds2 = RF.predict(x_test)

print("Model accuracy on test is: ", accuracy_score(y_test, test_preds2))


print("confusion_matrix train is:\n ", confusion_matrix(y_train, train_preds2))
print("confusion_matrix test is:\n ", confusion_matrix(y_test, test_preds2))
print('\nClassification Report Train is ')
print(classification_report (y_train, train_preds2))
print('\nClassification Report Test is ')
print(classification_report (y_test, test_preds2))
Model accuracy on train is:  1.0
Model accuracy on test is:  0.8717948717948718
confusion_matrix train is:
  [[ 40   0]
 [  0 116]]
confusion_matrix test is:
  [[ 5  3]
 [ 2 29]]

Classification Report Train is 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       1.00      1.00      1.00       116

    accuracy                           1.00       156
   macro avg       1.00      1.00      1.00       156
weighted avg       1.00      1.00      1.00       156


Classification Report Test is 
              precision    recall  f1-score   support

           0       0.71      0.62      0.67         8
           1       0.91      0.94      0.92        31

    accuracy                           0.87        39
   macro avg       0.81      0.78      0.79        39
weighted avg       0.87      0.87      0.87        39

In [32]:
final2 = RF.predict(x)
print(accuracy_score(y, final2))
0.9743589743589743
In [33]:
print((y_test !=test_preds2).sum(),'/',((y_test == test_preds2).sum()+(y_test != test_preds2).sum()))
5 / 39
In [34]:
from sklearn.naive_bayes import GaussianNB

NB=GaussianNB()
NB.fit(x_train,y_train)

train_preds4 = NB.predict(x_train)

print("Model accuracy on train is: ", accuracy_score(y_train, train_preds4))

test_preds4 = NB.predict(x_test)

print("Model accuracy on test is: ", accuracy_score(y_test, test_preds4))
print('*'*50)

print("confusion_matrix train is: \n", confusion_matrix(y_train, train_preds4))
print("confusion_matrix test is:\n ", confusion_matrix(y_test, test_preds4))
print('Wrong predictions out of total')
print('*'*50)
print('\nClassification Report Train is ')
print(classification_report (y_train, train_preds4))
print('\nClassification Report Test is ')
print(classification_report (y_test, test_preds4))
Model accuracy on train is:  0.7307692307692307
Model accuracy on test is:  0.6923076923076923
**************************************************
confusion_matrix train is: 
 [[38  2]
 [40 76]]
confusion_matrix test is:
  [[ 8  0]
 [12 19]]
Wrong predictions out of total
**************************************************

Classification Report Train is 
              precision    recall  f1-score   support

           0       0.49      0.95      0.64        40
           1       0.97      0.66      0.78       116

    accuracy                           0.73       156
   macro avg       0.73      0.80      0.71       156
weighted avg       0.85      0.73      0.75       156


Classification Report Test is 
              precision    recall  f1-score   support

           0       0.40      1.00      0.57         8
           1       1.00      0.61      0.76        31

    accuracy                           0.69        39
   macro avg       0.70      0.81      0.67        39
weighted avg       0.88      0.69      0.72        39

In [35]:
from sklearn.ensemble import RandomForestClassifier
RF = RandomForestClassifier(n_estimators=100, criterion='entropy')
RF.fit(x_train,y_train)
RF_pred = RF.predict(x)
print (RF_pred)

print('\n Accuracy Score')
print (accuracy_score (y,RF_pred))
print('\nClassification Report')
print(classification_report (y,RF_pred))
print('Confusion Matrix')
print (confusion_matrix (y,RF_pred))
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1
 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0
 1 0 1 0 0 0 0 0 0 0]

 Accuracy Score
0.9794871794871794

Classification Report
              precision    recall  f1-score   support

           0       0.96      0.96      0.96        48
           1       0.99      0.99      0.99       147

    accuracy                           0.98       195
   macro avg       0.97      0.97      0.97       195
weighted avg       0.98      0.98      0.98       195

Confusion Matrix
[[ 46   2]
 [  2 145]]
In [36]:
from sklearn.neighbors import KNeighborsClassifier

KNN = KNeighborsClassifier().fit(x_train,y_train)
 
train_preds5 = KNN.predict(x_train)

print("Model accuracy on train is: ", accuracy_score(y_train, train_preds5))


test_preds5 = KNN.predict(x_test)

print("Model accuracy on test is: ", accuracy_score(y_test, test_preds5))
print('*'*50)

print("confusion_matrix train is:\n ", confusion_matrix(y_train, train_preds5))
print("confusion_matrix test is:\n ", confusion_matrix(y_test, test_preds5))
print('Wrong predictions out of total')
print('*'*50)
print('\nClassification Report Train is ')
print(classification_report (y_train, train_preds5))
print('\nClassification Report Test is ')
print(classification_report (y_test, test_preds5))
Model accuracy on train is:  0.9102564102564102
Model accuracy on test is:  0.8461538461538461
**************************************************
confusion_matrix train is:
  [[ 30  10]
 [  4 112]]
confusion_matrix test is:
  [[ 4  4]
 [ 2 29]]
Wrong predictions out of total
**************************************************

Classification Report Train is 
              precision    recall  f1-score   support

           0       0.88      0.75      0.81        40
           1       0.92      0.97      0.94       116

    accuracy                           0.91       156
   macro avg       0.90      0.86      0.88       156
weighted avg       0.91      0.91      0.91       156


Classification Report Test is 
              precision    recall  f1-score   support

           0       0.67      0.50      0.57         8
           1       0.88      0.94      0.91        31

    accuracy                           0.85        39
   macro avg       0.77      0.72      0.74        39
weighted avg       0.84      0.85      0.84        39

In [37]:
final_preds5 = KNN.predict(x)
print("Accuracy of full dataset is : ", accuracy_score(y,final_preds5))
Accuracy of full dataset is :  0.8974358974358975
In [38]:
print((y_test !=test_preds5).sum(),'/',((y_test == test_preds5).sum()+(y_test != test_preds5).sum()))

print('*'*50)

print('KappaScore is: ', metrics.cohen_kappa_score(y_test,test_preds5))
6 / 39
**************************************************
KappaScore is:  0.48
In [39]:
from sklearn.svm import SVC
SVM_SIG = SVC (kernel = 'sigmoid')
SVM_SIG.fit(x_train,y_train)

train_preds6=SVM_SIG.predict(x_train)
print("Accuracy of train data set is : ", accuracy_score(y_train,train_preds6))
print('*'*50)

SVM_SIG_pred = SVM_SIG.predict(x)
print (SVM_SIG_pred)
print('*'*50)

print('\n Accuracy Score')
print (accuracy_score (y,SVM_SIG_pred))
print('\nClassification Report')
print(classification_report (y,SVM_SIG_pred))
print('*'*50)

print('Confusion Matrix')
print (confusion_matrix (y,SVM_SIG_pred))
Accuracy of train data set is :  0.7435897435897436
**************************************************
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1]
**************************************************

 Accuracy Score
0.7538461538461538

Classification Report
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        48
           1       0.75      1.00      0.86       147

    accuracy                           0.75       195
   macro avg       0.38      0.50      0.43       195
weighted avg       0.57      0.75      0.65       195

**************************************************
Confusion Matrix
[[  0  48]
 [  0 147]]
C:\Users\mannr\anaconda3\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
C:\Users\mannr\anaconda3\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
C:\Users\mannr\anaconda3\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
In [40]:
from sklearn.svm import SVC
SVM_SIG = SVC (kernel = 'poly')
SVM_SIG.fit(x_train,y_train)

train_preds6=SVM_SIG.predict(x_train)
print("Accuracy of train data set is : ", accuracy_score(y_train,train_preds6))
print('*'*50)

SVM_SIG_pred = SVM_SIG.predict(x)
print (SVM_SIG_pred)
print('*'*50)

print('\n Accuracy Score')
print (accuracy_score (y,SVM_SIG_pred))
print('\nClassification Report')
print(classification_report (y,SVM_SIG_pred))
print('*'*50)

print('Confusion Matrix')
print (confusion_matrix (y,SVM_SIG_pred))
Accuracy of train data set is :  0.8205128205128205
**************************************************
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 1
 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1]
**************************************************

 Accuracy Score
0.8307692307692308

Classification Report
              precision    recall  f1-score   support

           0       0.89      0.35      0.51        48
           1       0.82      0.99      0.90       147

    accuracy                           0.83       195
   macro avg       0.86      0.67      0.70       195
weighted avg       0.84      0.83      0.80       195

**************************************************
Confusion Matrix
[[ 17  31]
 [  2 145]]
In [41]:
from sklearn.svm import SVC
SVM_SIG = SVC (kernel = 'sigmoid')
SVM_SIG.fit(x_train,y_train)

train_preds6=SVM_SIG.predict(x_train)
print("Accuracy of train data set is : ", accuracy_score(y_train,train_preds6))
print('*'*50)

SVM_SIG_pred = SVM_SIG.predict(x)
print (SVM_SIG_pred)
print('*'*50)

print('\n Accuracy Score')
print (accuracy_score (y,SVM_SIG_pred))
print('\nClassification Report')
print(classification_report (y,SVM_SIG_pred))
print('*'*50)

print('Confusion Matrix')
print (confusion_matrix (y,SVM_SIG_pred))
Accuracy of train data set is :  0.7435897435897436
**************************************************
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1]
**************************************************

 Accuracy Score
0.7538461538461538

Classification Report
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        48
           1       0.75      1.00      0.86       147

    accuracy                           0.75       195
   macro avg       0.38      0.50      0.43       195
weighted avg       0.57      0.75      0.65       195

**************************************************
Confusion Matrix
[[  0  48]
 [  0 147]]
C:\Users\mannr\anaconda3\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
C:\Users\mannr\anaconda3\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
C:\Users\mannr\anaconda3\lib\site-packages\sklearn\metrics\_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
In [42]:
from sklearn.svm import SVC
SVM_SIG = SVC (kernel = 'rbf')
SVM_SIG.fit(x_train,y_train)

train_preds6=SVM_SIG.predict(x_train)
print("Accuracy of train data set is : ", accuracy_score(y_train,train_preds6))
print('*'*50)

SVM_SIG_pred = SVM_SIG.predict(x)
print (SVM_SIG_pred)
print('*'*50)

print('\n Accuracy Score')
print (accuracy_score (y,SVM_SIG_pred))
print('\nClassification Report')
print(classification_report (y,SVM_SIG_pred))
print('*'*50)

print('Confusion Matrix')
print (confusion_matrix (y,SVM_SIG_pred))
Accuracy of train data set is :  0.8141025641025641
**************************************************
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1]
**************************************************

 Accuracy Score
0.8153846153846154

Classification Report
              precision    recall  f1-score   support

           0       0.93      0.27      0.42        48
           1       0.81      0.99      0.89       147

    accuracy                           0.82       195
   macro avg       0.87      0.63      0.65       195
weighted avg       0.84      0.82      0.77       195

**************************************************
Confusion Matrix
[[ 13  35]
 [  1 146]]
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: